In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
Chisquare distribution
In [41]:
#Generate a random variable with 100 datapoints for a Chisquare distribution
chisquare = np.random.chisquare(2, 100)
In [42]:
#plot Chisquare distribution using a histogram
plt.hist(chisquare)
plt.show()
In [43]:
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(chisquare)
plt.axvline(chisquare.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(chisquare.mean() + chisquare.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(chisquare.mean()-chisquare.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()
In [44]:
#Evaluate descriptive statistics against data
c1 = np.mean(chisquare)
c2 = np.median(chisquare)
c3= np.var(chisquare)
c4 = np.std(chisquare,ddof=1)
c5 = np.std(chisquare ,ddof=1) / np.sqrt(len(chisquare))
print('chisquare')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)
#Plot a sample for rand3
sample=pd.DataFrame()
sample['chisquare'] = np.random.choice(chisquare, 100)
# Again, visualize the data.
sample.hist()
plt.show()
# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)
Dirichlet distribution
In [45]:
#Generate a random variable with 100 datapoints for a Chisquare distribution
dirichlet = np.random.dirichlet((1,2), 100)
In [46]:
#plot Dirichlet distribution using a histogram
plt.hist(dirichlet)
plt.show()
In [47]:
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(dirichlet)
plt.axvline(dirichlet.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(dirichlet.mean() + dirichlet.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(dirichlet.mean()-dirichlet.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()
In [48]:
#Evaluate descriptive statistics against data
d1 = np.mean(dirichlet)
d2 = np.median(dirichlet)
d3= np.var(dirichlet)
d4 = np.std(dirichlet,ddof=1)
d5 = np.std(dirichlet ,ddof=1) / np.sqrt(len(dirichlet))
print('dirichlet')
print(d1)
print(d2)
print(d3)
print(d4)
print(d5)
Standard t distribution
In [49]:
#Generate a random variable with 100 datapoints for a standard t distribution
standard_t = np.random.standard_t(50, 100)
In [50]:
#plot Standard t distribution using a histogram
plt.hist(standard_t)
plt.show()
In [51]:
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(standard_t)
plt.axvline(standard_t.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(standard_t.mean() + standard_t.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(standard_t.mean()-standard_t.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()
In [52]:
#Evaluate descriptive statistics against data
c1 = np.mean(standard_t)
c2 = np.median(standard_t)
c3= np.var(standard_t)
c4 = np.std(standard_t,ddof=1)
c5 = np.std(standard_t ,ddof=1) / np.sqrt(len(standard_t))
print('standard_t')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)
#Plot a sample for rand3
sample=pd.DataFrame()
sample['standard_t'] = np.random.choice(standard_t, 100)
# Again, visualize the data.
sample.hist()
plt.show()
# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)
Logistic distribution
In [53]:
#Generate a random variable with 100 datapoints for logistic distribution
logistic = np.random.logistic(9,2, 100)
In [54]:
#plot logistic distribution using a histogram
plt.hist(logistic)
plt.show()
In [55]:
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(logistic)
plt.axvline(logistic.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(logistic.mean() + logistic.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(logistic.mean()-logistic.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()
In [56]:
#Evaluate descriptive statistics against data
c1 = np.mean(logistic)
c2 = np.median(logistic)
c3= np.var(logistic)
c4 = np.std(logistic,ddof=1)
c5 = np.std(logistic ,ddof=1) / np.sqrt(len(logistic))
print('standard_t')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)
#Plot a sample for rand3
sample=pd.DataFrame()
sample['logistic'] = np.random.choice(logistic, 100)
# Again, visualize the data.
sample.hist()
plt.show()
# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)
Rayleigh distribution
In [57]:
#Generate a random variable with 100 datapoints for rayleigh distribution
rayleigh = np.random.rayleigh(1, 100)
In [58]:
#plot Rayleigh distribution using a histogram
plt.hist(rayleigh)
plt.show()
In [59]:
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(rayleigh)
plt.axvline(rayleigh.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rayleigh.mean() + rayleigh.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rayleigh.mean()-rayleigh.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()
In [60]:
#Evaluate descriptive statistics against data
c1 = np.mean(rayleigh)
c2 = np.median(rayleigh)
c3= np.var(rayleigh)
c4 = np.std(rayleigh,ddof=1)
c5 = np.std(rayleigh ,ddof=1) / np.sqrt(len(rayleigh))
print('rayleigh')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)
#Plot a sample for rand3
sample=pd.DataFrame()
sample['rayleigh'] = np.random.choice(rayleigh, 100)
# Again, visualize the data.
sample.hist()
plt.show()
# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)
Geometric distribution
In [61]:
#Generate a random variable with 100 datapoints for Geometric distribution
geometric = np.random.geometric(0.25, 100)
In [62]:
#plot Geometric distribution using a histogram
plt.hist(geometric)
plt.show()
In [63]:
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(geometric)
plt.axvline(geometric.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(geometric.mean() + geometric.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(geometric.mean()-geometric.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()
In [64]:
#Evaluate descriptive statistics against data
c1 = np.mean(geometric)
c2 = np.median(geometric)
c3= np.var(geometric)
c4 = np.std(geometric,ddof=1)
c5 = np.std(geometric ,ddof=1) / np.sqrt(len(geometric))
print('geometric')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)
#Plot a sample for rand3
sample=pd.DataFrame()
sample['geometric'] = np.random.choice(geometric, 100)
# Again, visualize the data.
sample.hist()
plt.show()
# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)
In [65]:
#Generation of 2 normally-distributed variables, one with mean 5 and standard deviation of 0.5 and the other with mean of 10 and standar deviation of 1
rand1 = np.random.normal(5, 0.5, 1000)
rand2 = np.random.normal(10,1, 1000)
In [66]:
#Generate a 3rd variable adding the 2 normallt distributed variables
rand3= rand1+rand2
In [67]:
#Plot histogram of the 3rd variable
plt.hist(rand3, bins=20, color='c')
plt.show()
In [68]:
#Compute and plot the mean and standard deviation for variable rand3
plt.hist(rand3, bins=20, color='c')
plt.axvline(rand3.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rand3.mean() + rand3.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand3.mean()-rand3.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()
In [69]:
#Calculate descriptive statistics for variable rand3
a = np.mean(rand3)
b = np.median(rand3)
c= np.var(rand3)
d = np.std(rand3,ddof=1)
e = np.std(rand3 ,ddof=1) / np.sqrt(len(rand3))
print(a)
print(b)
print(c)
print(d)
print(e)
In [70]:
#Plot a sample for rand3
sample=pd.DataFrame()
sample['variable3'] = np.random.choice(rand3, 100)
# Again, visualize the data.
sample.hist()
plt.show()
# Check how well the sample replicates the population.
f = sample.mean()
g = sample.std(ddof=1)
print(f)
print(g)
In [71]:
#Plot histogram of the 3 variabless
plt.hist(rand1, bins=20, color = 'b')
plt.hist(rand2, bins=20, color='r')
plt.hist(rand3, bins=20, color='c')
plt.show()
In [72]:
#Compute and plot the mean and standard deviation for each of the variables
plt.hist(rand1, bins=20, color = 'b')
plt.hist(rand2, bins=20, color='r')
plt.hist(rand3, bins=20, color='c')
plt.axvline(rand1.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rand1.mean() + rand1.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand1.mean()-rand1.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand2.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rand2.mean() + rand2.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand2.mean()-rand2.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand3.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rand3.mean() + rand3.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand3.mean()-rand3.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()
In [73]:
#Evaluate descriptive statistics against data
a2 = np.mean(rand2)
b2 = np.median(rand2)
c2= np.var(rand2)
d2 = np.std(rand2,ddof=1)
e2 = np.std(rand2 ,ddof=1) / np.sqrt(len(rand2))
print('rand2')
print(a2)
print(b2)
print(c2)
print(d2)
print(e2)
#Plot a sample for rand3
sample=pd.DataFrame()
sample['variable2'] = np.random.choice(rand2, 100)
# Again, visualize the data.
sample.hist()
plt.show()
# Check how well the sample replicates the population.
f2 = sample.mean()
g2 = sample.std(ddof=1)
print(f2)
print(g2)
In [74]:
#Evaluate descriptive statistics against data
a1 = np.mean(rand1)
b1 = np.median(rand1)
c1= np.var(rand1)
d1 = np.std(rand1,ddof=1)
e1 = np.std(rand1 ,ddof=1) / np.sqrt(len(rand1))
print('rand1')
print(a1)
print(b1)
print(c1)
print(d1)
print(e1)
#Plot a sample for rand3
sample=pd.DataFrame()
sample['variable1'] = np.random.choice(rand1, 100)
# Again, visualize the data.
sample.hist()
plt.show()
# Check how well the sample replicates the population.
f1 = sample.mean()
g1 = sample.std(ddof=1)
print(f1)
print(g1)
In [ ]: